/*
 * Copyright (c) Kumo Inc. and affiliates.
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#pragma once

#include <sys/stat.h>
#include <sys/types.h>

#include <cassert>
#include <limits>

#include <melon/portability.h>
#include <melon/range.h>
#include <melon/scope_guard.h>
#include <melon/net/network_socket.h>
#include <melon/portability/fcntl.h>
#include <melon/portability/sys_uio.h>
#include <melon/portability/unistd.h>

namespace melon {
    /**
     * Convenience wrappers around some commonly used system calls.  The *NoInt
     * wrappers retry on EINTR.  The *Full wrappers retry on EINTR and also loop
     * until all data is written.  Note that *Full wrappers weaken the thread
     * semantics of underlying system calls.
     */
    int openNoInt(const char *name, int flags, mode_t mode = 0666);

    // Two overloads, as we may be closing either a file or a socket.
    int closeNoInt(int fd);

    int closeNoInt(NetworkSocket fd);

    int dupNoInt(int fd);

    int dup2NoInt(int oldFd, int newFd);

    int fsyncNoInt(int fd);

    int fdatasyncNoInt(int fd);

    int ftruncateNoInt(int fd, off_t len);

    int truncateNoInt(const char *path, off_t len);

    int flockNoInt(int fd, int operation);

    int shutdownNoInt(NetworkSocket fd, int how);

    ssize_t readNoInt(int fd, void *buf, size_t count);

    ssize_t preadNoInt(int fd, void *buf, size_t count, off_t offset);

    ssize_t readvNoInt(int fd, const iovec *iov, int count);

    ssize_t preadvNoInt(int fd, const iovec *iov, int count, off_t offset);

    ssize_t writeNoInt(int fd, const void *buf, size_t count);

    ssize_t pwriteNoInt(int fd, const void *buf, size_t count, off_t offset);

    ssize_t writevNoInt(int fd, const iovec *iov, int count);

    ssize_t pwritevNoInt(int fd, const iovec *iov, int count, off_t offset);

    /**
     * Wrapper around read() (and pread()) that, in addition to retrying on
     * EINTR, will loop until all data is read.
     *
     * This wrapper is only useful for blocking file descriptors (for non-blocking
     * file descriptors, you have to be prepared to deal with incomplete reads
     * anyway), and only exists because POSIX allows read() to return an incomplete
     * read if interrupted by a signal (instead of returning -1 and setting errno
     * to EINTR).
     *
     * Note that this wrapper weakens the thread safety of read(): the file pointer
     * is shared between threads, but the system call is atomic.  If multiple
     * threads are reading from a file at the same time, you don't know where your
     * data came from in the file, but you do know that the returned bytes were
     * contiguous.  You can no longer make this assumption if using readFull().
     * You should probably use pread() when reading from the same file descriptor
     * from multiple threads simultaneously, anyway.
     *
     * Note that readvFull and preadvFull require iov to be non-const, unlike
     * readv and preadv.  The contents of iov after these functions return
     * is unspecified.
     */
    MELON_NODISCARD ssize_t readFull(int fd, void *buf, size_t count);

    MELON_NODISCARD ssize_t
    preadFull(int fd, void *buf, size_t count, off_t offset);

    MELON_NODISCARD ssize_t readvFull(int fd, iovec *iov, int count);

    MELON_NODISCARD ssize_t preadvFull(int fd, iovec *iov, int count, off_t offset);

    /**
     * Similar to readFull and preadFull above, wrappers around write() and
     * pwrite() that loop until all data is written.
     *
     * Generally, the write() / pwrite() system call may always write fewer bytes
     * than requested, just like read().  In certain cases (such as when writing to
     * a pipe), POSIX provides stronger guarantees, but not in the general case.
     * For example, Linux (even on a 64-bit platform) won't write more than 2GB in
     * one write() system call.
     *
     * Note that writevFull and pwritevFull require iov to be non-const, unlike
     * writev and pwritev.  The contents of iov after these functions return
     * is unspecified.
     *
     * These functions return -1 on error, or the total number of bytes written
     * (which is always the same as the number of requested bytes) on success.
     */
    ssize_t writeFull(int fd, const void *buf, size_t count);

    ssize_t pwriteFull(int fd, const void *buf, size_t count, off_t offset);

    ssize_t writevFull(int fd, iovec *iov, int count);

    ssize_t pwritevFull(int fd, iovec *iov, int count, off_t offset);

    /**
     * Read entire file (if num_bytes is defaulted) or no more than
     * num_bytes (otherwise) into container *out. The container is assumed
     * to be contiguous, with element size equal to 1, and offer size(),
     * reserve(), and random access (e.g. std::vector<char>, std::string,
     * kmstring).
     *
     * Returns: true on success or false on failure. In the latter case
     * errno will be set appropriately by the failing system primitive.
     */
    template<class Container>
    bool readFile(
        int fd,
        Container &out,
        size_t num_bytes = std::numeric_limits<size_t>::max()) {
        static_assert(
            sizeof(out[0]) == 1,
            "readFile: only containers with byte-sized elements accepted");

        size_t soFar = 0; // amount of bytes successfully read
        SCOPE_EXIT {
            assert(out.size() >= soFar); // resize better doesn't throw
            out.resize(soFar);
        };

        // Obtain file size:
        struct stat buf;
        if (fstat(fd, &buf) == -1) {
            return false;
        }
        // Some files (notably under /proc and /sys on Linux) lie about
        // their size, so treat the size advertised by fstat under advise
        // but don't rely on it. In particular, if the size is zero, we
        // should attempt to read stuff. If not zero, we'll attempt to read
        // one extra byte.
        constexpr size_t initialAlloc = 1024 * 4;
        out.resize(std::min(
            buf.st_size > 0 ? (size_t(buf.st_size) + 1) : initialAlloc, num_bytes));

        while (soFar < out.size()) {
            const auto actual = readFull(fd, &out[soFar], out.size() - soFar);
            if (actual == -1) {
                return false;
            }
            soFar += actual;
            if (soFar < out.size()) {
                // File exhausted
                break;
            }
            // Ew, allocate more memory. Use exponential growth to avoid
            // quadratic behavior. Cap size to num_bytes.
            out.resize(std::min(out.size() * 3 / 2, num_bytes));
        }

        return true;
    }

    /**
     * Same as above, but takes in a file name instead of fd
     */
    template<class Container>
    bool readFile(
        const char *file_name,
        Container &out,
        size_t num_bytes = std::numeric_limits<size_t>::max()) {
        assert(file_name);

        const auto fd = openNoInt(file_name, O_RDONLY | O_CLOEXEC);
        if (fd == -1) {
            return false;
        }

        SCOPE_EXIT {
            // Ignore errors when closing the file
            closeNoInt(fd);
        };

        return readFile(fd, out, num_bytes);
    }

    /**
     * Writes container to file. The container is assumed to be
     * contiguous, with element size equal to 1, and offering STL-like
     * methods empty(), size(), and indexed access
     * (e.g. std::vector<char>, std::string, kmstring, StringPiece).
     *
     * "flags" dictates the open flags to use. Default is to create file
     * if it doesn't exist and truncate it.
     *
     * Returns: true on success or false on failure. In the latter case
     * errno will be set appropriately by the failing system primitive.
     *
     * Note that this function may leave the file in a partially written state on
     * failure.  Use writeFileAtomic() if you want to ensure that the existing file
     * state will be unchanged on error.
     */
    template<class Container>
    bool writeFile(
        const Container &data,
        const char *filename,
        int flags = O_WRONLY | O_CREAT | O_TRUNC,
        mode_t mode = 0666) {
        static_assert(
            sizeof(data[0]) == 1, "writeFile works with element size equal to 1");
        int fd = open(filename, flags, mode);
        if (fd == -1) {
            return false;
        }
        bool ok = data.empty() ||
                  writeFull(fd, &data[0], data.size()) == static_cast<ssize_t>(data.size());
        return closeNoInt(fd) == 0 && ok;
    }

    /* For atomic writes, do we sync to guarantee ordering or not? */
    enum class SyncType {
        WITH_SYNC,
        WITHOUT_SYNC,
    };

    class WriteFileAtomicOptions {
    public:
        WriteFileAtomicOptions() = default;

        mode_t permissions{0644};
        SyncType syncType{SyncType::WITHOUT_SYNC};
        std::string temporaryDirectory;

        // The mode bits used for the temporary file
        WriteFileAtomicOptions &setPermissions(mode_t);

        // The default implementation does not sync the data to storage before the
        // rename.  Therefore, the write is *not* atomic in the event of a power
        // failure or OS crash.  To guarantee atomicity in these cases, specify
        // syncType = WITH_SYNC, which will incur a performance cost of waiting for
        // the data to be persisted to storage.  Note that the return of the function
        // does not guarantee the directory modifications have been written to disk; a
        // further sync of the directory after the function returns is required to
        // ensure the modification is durable.
        WriteFileAtomicOptions &setSyncType(SyncType);

        // The implementation creates a temporary file as an implementation detail
        // within this directory.  The temporary filenames themselves are
        // implementation defined.
        WriteFileAtomicOptions &setTemporaryDirectory(std::string);
    };

    /*
     * writeFileAtomic() does not currently work on Windows.
     * Windows does not provide atomic file renames, which makes implementing this
     * tricky.  Windows does have a MoveFileTransactedA() API which could
     * potentially be used, but according to the Microsoft documentation this API is
     * discouraged and may be removed in a future version.
     *
     * In order to implement this properly on Windows we would probably need a pair
     * of functions: one for writing the file, and one for reading the contents,
     * where the two functions synchronize with each other.  We can probably only
     * provide atomic update behavior with cooperation from the reader.
     */
#ifndef _WIN32

    /**
     * Write file contents "atomically".
     *
     * This writes the data to a temporary file in the destination directory, and
     * then renames it to the specified path.  This guarantees that the specified
     * file will be replaced the specified contents on success, or will not be
     * modified on failure.
     *
     * Note that on platforms that do not provide atomic filesystem rename
     * functionality (e.g., Windows) this behavior may not be truly atomic.
     *
     * The default implementation does not sync the data to storage before the
     * rename.  Therefore, the write is *not* atomic in the event of a power failure
     * or OS crash.  To guarantee atomicity in these cases, specify syncType =
     * WITH_SYNC, which will incur a performance cost of waiting for the data to be
     * persisted to storage.  Note that the return of the function does not
     * guarantee the directory modifications have been written to disk; a further
     * sync of the directory after the function returns is required to ensure the
     * modification is durable.
     */
    void writeFileAtomic(
        StringPiece filePath,
        iovec *iov,
        int count,
        mode_t permissions = 0644,
        SyncType syncType = SyncType::WITHOUT_SYNC);

    void writeFileAtomic(
        StringPiece filePath,
        ByteRange data,
        mode_t permissions = 0644,
        SyncType syncType = SyncType::WITHOUT_SYNC);

    void writeFileAtomic(
        StringPiece filePath,
        StringPiece data,
        mode_t permissions = 0644,
        SyncType syncType = SyncType::WITHOUT_SYNC);

    void writeFileAtomic(
        StringPiece filePath, StringPiece data, const WriteFileAtomicOptions &);

    /**
     * A version of writeFileAtomic() that returns an errno value instead of
     * throwing on error.
     *
     * Returns 0 on success or an errno value on error.
     */
    int writeFileAtomicNoThrow(
        StringPiece filePath,
        iovec *iov,
        int count,
        mode_t permissions = 0644,
        SyncType syncType = SyncType::WITHOUT_SYNC);

    int writeFileAtomicNoThrow(
        StringPiece filePath, StringPiece data, const WriteFileAtomicOptions &);

#endif // !_WIN32
} // namespace melon
